Mathematics Crystal - mat/Topics/Probability/Discrete Probability Distributions.tex

/ Topics / Probability / Discrete Probability Distributions.tex PDF

\documentclass[10pt]{article}
\usepackage{amssymb,amsmath}
\usepackage[hmargin=1cm,vmargin=1cm]{geometry}
\begin{document}
{\large Discrete Probability Distributions}
\begin{align*}
\text{\bf Discrete }&\text{\bf Random Variables: \rm A random variable $X$ is \bf discrete \rm if the possible values of $X$ are countable.}\\
&\text{If these values are }\{x_1,x_2,\ldots,x_k,\ldots,x_n\}\text{ and }p_k=P(X=x_k),\text{ then }F_X(x)=\sum_{k:x_k\le x}p_k,~~0\le p_k\le 1\text{ and }\sum_k p_k=1.\\
&\text{Expected Value (or mean):}\quad\boxed{E(X)=\sum_k x_kp_k.}\\
&\quad\text{Note 1: As the number of trials approach infinity, the expected value approaches the arithmetic mean.}\\
&\quad\text{Note 2: Consider a $k$-dimensional space with a random variable vector $\mathbf{x}=(x_1,\ldots,x_n)$ and a probability vector}\\
&\quad\quad\text{$\mathbf{p}=(p_1,\ldots,p_n)$,\quad$\mathrm{E}(X)=\mathbf{x}\cdot\mathbf{p}$.}\\
\\
&\text{Theorem:\quad If }Y=g(X),\quad \mathrm{E}(Y)=\mathrm{E}(g(X))=\sum_k g(x_k)p_k.
\qquad\text{(If $g$ is linear, $\mathrm{E}(g(X))=g(\mathrm{E}(X))$.}\\
%
&\text{Variance:}\quad\boxed{\mathrm{Var}(X)=\mathrm{E}\big((X-\mathrm{E}(X))^2\big)=\sum_k\big(x_k-\mu\big)^2p_k,\quad\text{where $\mu=\mathrm{E}(X)$}.}\\
&\quad\text{Note: Variance is ``produced'' by taking the difference of the random variable and the expected value $\big(X-E(X)\big)$,}\\
&\quad\quad\text{square it $\big(X-E(X)\big)^2$, then the expected value of this new random variable is the variance.}\\
\\
&\text{Standard Deviation}:\quad\boxed{\mathrm{SD}(X)=\sqrt{\mathrm{Var}(X)}.}\\
\\
&\text{Theorem:}\quad\mathrm{Var}(X)=\mathrm{E}(X^2)-\big(\mathrm{E}(X)\big)^2.\\
&\quad\text{Proof: Let $\mu=\mathrm{E}(X)$ and $g(X)=(X-\mu)^2$, then}\quad
\mathrm{Var}(X)
=\sum_k(x_k-\mu)^2p_k
=\sum_k(x_k^2-2x_k\mu+\mu^2)~p_k\\&\qquad
=\sum_k x_k^2p_k-2\mu\sum_k x_k p_k+\mu^2\sum_k p_k
=\mathrm{E}(X^2)-2\mu^2+\mu^2=\mathrm{E}(X^2)-\big(\mathrm{E}(X)\big)^2.\\
\\
&\text{Lemma}:\quad\mathrm{E}\left(\sum_{r=0}^n a_r X^r\right)
=\sum_{r=0}^n a_r\mathrm{E}(X^r)\\
&\quad\text{Proof:}\quad
\text{LHS}
=\sum_k\left(\sum_{r=0}^n a_r x_k^r\right)p_k
=\sum_{r=0}^n a_r\sum_k x_k^r p_k
=\sum_{r=0}^n a_r\mathrm{E}(X^r)
=\text{RHS}.\\
%
&\text{Theorem:}\quad
\mathrm{E}(aX+b)=a\mathrm{E}(X)+b,~~
\mathrm{Var}(aX+b)=a^2\mathrm{Var}(X),~~
\mathrm{SD}(aX+b)=|a|\mathrm{SD}(X).~~
\text{($a$ and $b$ are constants.)}\\
&\quad\text{Proof:}\quad
\mathrm{E}(aX+b)=a\mathrm{E}(X)+b
\text{ is the result of the previous lemma}.\\
&\qquad
\mathrm{Var}(aX+b)
=\mathrm{E}\Big(\big((aX+b)-\mathrm{E}(aX+b)\big)^2\Big)
=\mathrm{E}\Big(\big(aX+b-a\mathrm{E}(X)-b\big)^2\Big)
=\mathrm{E}\big(a^2(X-\mathrm{E}(X))^2\big)\\&\qquad\qquad
=a^2\mathrm{E}\big((X-\mathrm{E}(X))^2\big)
=a^2\mathrm{Var}(X).\\
&\qquad
\mathrm{SD}(aX+b)
=\sqrt{\mathrm{Var}(aX+b)}
=\sqrt{a^2\mathrm{Var}(X)}
=|a|\sqrt{\mathrm{Var}(X)}
=|a|\mathrm{SD}(X).
\end{align*}
\begin{align*}
\text{\bf The }&\text{\bf Binomial Distribution: }\boxed{B(n,p,k)=\binom{n}{k}p^k(1-p)^{n-k}\quad\text{ for }n\in\mathbb{N}\text{ and }k=0,1,\ldots,n.}\\
&\text{This represents the probability of exactly $k$ occurrences, each with probability $p$, in $n$ trials.}\\
&\text{Let $q=1-p$.}\quad\sum_k B(n,p,k)=\sum_{k=0}^n B(n,p,k)=\sum_{k=0}^n\binom{n}{k}p^k q^{n-k}=(p+q)^n=1.\\
&\text{If $X$ is the random variable that measures the number of outcome of some Bernoulli process with $n$ trials,}\\
&\text{each with probability $p$, then $X$ has the binomial distribution $B(n,p)$.  We write $X\sim B(n,p)$.  ($x_k=k$.)}\\
&\text{Note: Not to be confused $n$ trials and $n+1$ possible outcomes $k=0,1,\ldots,n$, including the case of no outcomes.}\\
%
%
&\boxed{\mathrm{E}(X)=np.}\quad
\text{Proof: }p+q=1,\quad\frac{d}{dp}\left(\frac{p}{q}\right)=\frac{~1~}{q^2},\quad\frac{d}{dp}\left(\frac{p}{q}\right)^k=k\left(\frac{p}{q}\right)^{k-1}\frac{1}{q^2},\qquad
k\left(\frac{p}{q}\right)^{k-1}=q^2\frac{d}{dp}\left(\frac{p}{q}\right)^k.\\
&\quad\mathrm{E}(X)
=\sum_{k=0}^n x_k p_k
=\sum_{k=0}^n k\binom{n}{k}p^k q^{n-k}
=pq^{n-1}\sum_{k=0}^n \binom{n}{k}k\left(\frac{p}{q}\right)^{k-1}
=pq^{n-1}\sum_{k=0}^n \binom{n}{k}q^2\frac{d}{dp}\left(\frac{p}{q}\right)^k\\&\quad
=pq^{n+1}\frac{d}{dp}\left[\sum_{k=0}^n \binom{n}{k}\left(\frac{p}{q}\right)^k\right]
=pq^{n+1}\frac{d}{dp}\left[1+\frac{p}{q}\right]^n
=pq^{n+1}\frac{d}{dp}\left[\frac{~1~}{q}\right]^n
=pq^{n+1}\frac{n}{{q}^{n+1}}
=np.\\
\\
&\boxed{\mathrm{Var}(X)=npq=np(1-p).}\\
&\quad\text{Proof:}\quad
\frac{d^2}{dp^2}\left(\frac{p}{q}\right)^k
=\frac{d}{dp}\left[k\left(\frac{p}{q}\right)^{k-1}\frac{1}{q^2}\right]
=\frac{k}{q^2}\frac{d}{dp}\left[\left(\frac{p}{q}\right)^{k-1}\right]+k\left(\frac{p}{q}\right)^{k-1}\frac{d}{dp}\frac{1}{q^2}\\&\qquad
=\frac{k(k-1)}{q^4}\left(\frac{p}{q}\right)^{k-2}+2k\left(\frac{p}{q}\right)^{k-1}\frac{1}{q^3}
=\frac{k^2}{q^4}\left(\frac{p}{q}\right)^{k-2}-\frac{k}{q^4}\left(\frac{p}{q}\right)^{k-2}+2k\left(\frac{p}{q}\right)^{k-1}\frac{1}{q^3}\\&\qquad
=\frac{k^2}{q^4}\left(\frac{p}{q}\right)^{k-2}-k\left(\frac{p}{q}\right)^{k-1}\left(\frac{1}{q^4}\left(\frac{p}{q}\right)^{-1}-\frac{2}{q^3}\right)
=\frac{k^2}{q^4}\left(\frac{p}{q}\right)^{k-2}-q^2\frac{d}{dp}\left(\frac{p}{q}\right)^k\cdot
\left(\frac{1}{q^4}\left(\frac{p}{q}\right)^{-1}-\frac{2}{q^3}\right).\\&\qquad
=\frac{k^2}{q^4}\left(\frac{p}{q}\right)^{k-2}-\frac{1}{q^4}\frac{d}{dp}\left(\frac{p}{q}\right)^k\cdot
\left(\frac{q^3}{p}-2q^3\right).\\
\\
&\qquad
k^2\left(\frac{p}{q}\right)^{k-2}
=q^4\frac{d^2}{dp^2}\left(\frac{p}{q}\right)^k+\frac{d}{dp}\left(\frac{p}{q}\right)^k\cdot\left(\frac{q^3}{p}-2q^3\right).\\
\\
&\qquad\text{Let }\mu=\mathrm{E}(X)=np,\quad
\mathrm{Var}(X)
=\mathrm{E}(X^2)-\big(\mathrm{E}(X)\big)^2
=\left[\sum_{k=0}^n x_k^2p_k\right]-(np)^2
=\left[\sum_{k=0}^n k^2\binom{n}{k}p^k q^{n-k}\right]-(np)^2\\&\qquad
=p^2q^{n-2}\left[\sum_{k=0}^n\binom{n}{k}k^2\left(\frac{p}{q}\right)^{k-2}\right]-(np)^2\\&\qquad
=p^2q^{n-2}\left[\sum_{k=0}^n\binom{n}{k}\left[q^4\frac{d^2}{dp^2}\left(\frac{p}{q}\right)^k+\frac{d}{dp}\left(\frac{p}{q}\right)^k\cdot\left(\frac{q^3}{p}-2q^3\right)\right]\right]-(np)^2\\&\qquad
=p^2q^{n-2}\left[q^4\frac{d^2}{dp^2}\sum_{k=0}^n\binom{n}{k}\left(\frac{p}{q}\right)^k+\left(\frac{q^3}{p}-2q^3\right)\cdot\frac{d}{dp}\sum_{k=0}^n\binom{n}{k}\left(\frac{p}{q}\right)^k\right]-(np)^2\\&\qquad
=p^2q^{n-2}\left[q^4\frac{d^2}{dp^2}\left(1+\frac{p}{q}\right)^n+\left(\frac{q^3}{p}-2q^3\right)\cdot\frac{d}{dp}\left(1+\frac{p}{q}\right)^n\right]-(np)^2\\&\qquad
=p^2q^{n-2}\left[q^4\frac{d^2}{dp^2}q^{-n}+\left(\frac{q^3}{p}-2q^3\right)\cdot\frac{d}{dp}q^{-n}\right]-(np)^2\\&\qquad
=p^2q^{n-2}\left[q^4n(n+1)q^{-n-2}+\left(\frac{q^3}{p}-2q^3\right)nq^{-n-1}\right]-(np)^2\\&\qquad
=p^2q^{n-2}q^4n(n+1)q^{-n-2}+p^2q^{n-2}\frac{q^3}{p}nq^{-n-1}-p^2q^{n-2}\cdot 2q^3nq^{-n-1}-(np)^2\\&\qquad
=n(n+1)p^2+np-2np^2-n^2p^2
=n^2p^2+np^2+np-2np^2-n^2p^2
=np-np^2=np(1-p).
\end{align*}
%
%
\begin{align*}
\text{\bf The }&\text{\bf Geometric Distribution: }\boxed{G(p,k)=(1-p)^{k-1}p$\quad for $k=1,2,\ldots~.}\\
&\text{This represents the probability of the first occurrence happening on the $k$th trial, each trial with probability $p$.}\\
&\text{Let $q=1-p$.}\quad\sum_{k=1}^\infty G(p,k)=\sum_{k=1}^\infty q^{k-1}p=p\sum_{k=0}^\infty q^k=p\cdot\frac{1}{1-q}=1.\\
&\text{Consider an infinite Bernoulli process of trials each of which has a probability of $p$.  If the random variable $X$}\\
&\text{measures the number of trials conducted until the first occurrence, then $X$ has the geometric distribution $G(p)$.}\\
&\text{We write $X\sim G(p)$.  ($x_k=k$)}\\
&\boxed{\mathrm{E}(X)=\frac{1}{p}.}\\
&\quad\text{Proof: }\mathrm{E}(X)
=\sum_{k=1}^\infty x_k p_k
=\sum_{k=1}^\infty kq^{k-1}p
=\sum_{k=1}^\infty\frac{d}{dq}q^k\cdot p
=p\cdot\frac{d}{dq}\sum_{k=1}^\infty q^k
=p\cdot\frac{d}{dq}\left(\frac{q}{1-q}\right)
=p\cdot\frac{d}{dq}\left(\frac{q}{p}\right)
=p\cdot\frac{p+q}{p^2}
=\frac{1}{p}.\\
\\
&\boxed{\mathrm{Var}(X)=\frac{q}{p^2}=\frac{1-p}{p^2}.}\\
&\quad\text{Proof:}\quad
\frac{d}{dq}q^k=kq^{k-1},\quad
\frac{d^2}{dq^2}q^k
=k(k-1)q^{k-2}
=(k^2q^{k-1}-kq^{k-1})q^{-1},\\
&\qquad\qquad
k^2q^{k-1}
=q\frac{d^2}{dq^2}q^k+kq^{k-1}
=q\frac{d^2}{dq^2}q^k+\frac{d}{dq}q^k.\\
\\
&\qquad\mathrm{Var}(X)
=\mathrm{E}(X^2)-\big(\mathrm{E}(X)\big)^2
=\left(\sum_{k=1}^\infty x_k^2 p_k\right)-\tfrac{1}{p}^2
=\left(\sum_{k=1}^\infty k^2q^{k-1}p\right)-p^{-2}\\
&\qquad
=p\sum_{k=1}^\infty\left(q\frac{d^2}{dq^2}q^k+\frac{d}{dq}q^k\right)-p^{-2}
=p\left(q\frac{d^2}{dq^2}\sum_{k=1}^\infty q^k+\frac{d}{dq}\sum_{k=1}^\infty q^k\right)-p^{-2}
=p\left(q\frac{d^2}{dq^2}\left(\frac{q}{1-q}\right)+\frac{d}{dq}\left(\frac{q}{1-q}\right)\right)-p^{-2}\\&\qquad
=p\left(q\frac{d^2}{dq^2}\left(\frac{q}{p}\right)+\frac{d}{dq}\left(\frac{q}{p}\right)\right)-p^{-2}
=p\left(q\cdot\frac{2}{p^3}+\frac{1}{p^2}\right)-p^{-2}
=\frac{2q}{p^2}+\frac{p}{p^2}-\frac{1}{p^2}
=\frac{2q+p-1}{p^2}
=\frac{q}{p^2}
=\frac{1-p}{p^2}.\\
\\
&\text{Theorem:}\quad P(X>n)=(1-p)^n=q^n,\quad\text{where $X\sim G(p),~~n=1,2,\ldots$.}\\
&\quad\text{Proof:}\quad P(X>n)=\sum_{k=n+1}^\infty q^{k-1}p=pq^n\sum_{k=0}^\infty q^k=pq^n\cdot\frac{1}{1-q}=pq^n\cdot\frac{1}{p}=q^n.\\
\end{align*}
\end{document}